pacman::p_load("tidyverse",
              "cem",
              "MatchIt")

remove(list = ls())

iat_data <- read_rds("./data_clean/rep_file_iat_data.RDS")

#set up observations to match
#explicit is the new explicit score
matching_data <- dplyr::select(iat_data, explicit, IAT,
                               family_in_Russia, female, age_response, 
                               unemployed, ethnicity, city_2_cat, parl2014_3) %>%
  mutate_at(vars(parl2014_3, ethnicity), as.factor) %>%
  mutate(east = ifelse(city_2_cat == "East", 1, 0))

mycuts <-  list(age_response = c(25, 35, 45))

matching_data_c <- na.omit(matching_data)

#party voting data sets
abstain_v_russia <- filter(matching_data_c, parl2014_3 %in% c("Abstainers","Pro-Russia Voters")) %>%
  mutate(abstain = ifelse(parl2014_3 =="Abstainers",1, 0 )) %>% 
  arrange(abstain)

abstain_v_ukr <- filter(matching_data_c, parl2014_3 %in% c("Abstainers","Anti-Russia Voters")) %>%
  mutate(abstain = ifelse(parl2014_3 =="Abstainers",1, 0 )) %>% arrange(abstain)

#ethnicity data sets
ukr_v_rus <- filter(matching_data_c, ethnicity %in% c("Russian only","Ukrainian only")) %>%
  mutate(russian = ifelse(ethnicity =="Russian only",1, 0 )) %>% arrange(ethnicity)

ukr_v_mix <- filter(matching_data_c, ethnicity %in% c("Ukr and Rus","Ukrainian only")) %>%
  mutate(mixed = ifelse(ethnicity =="Ukr and Rus",1, 0 )) %>% arrange(ethnicity)

#here we have abstainers and pro-russia voders
# match_abstain_1 <- matchit(abstain ~ explicit + family_in_Russia + unemployed + age_response +
#                              female + ethnicity + city_2_cat,
#                       data = abstain_v_russia,
#                       method = "cem",
#                       cutpoints = mycuts)
# 
# match_abstain_1_d <- match.data(match_abstain_1)
# 
# match_abstain_1_d_imb <- imbalance(
#   group= match_abstain_1_d$abstain,
#   data= as.data.frame(match_abstain_1_d),
#   drop=c("abstain","IAT", "distance",
#                              "weights","subclass"
#          ))



#ukraine versus russian -- do not match on explicit or implicit
cem_rus_imp <- cem(treatment = "russian",
                       data = as.data.frame(ukr_v_rus),
                       drop = c("IAT","city_2_cat", "ethnicity", "explicit"),
                       # keep.all=TRUE,
                       verbose = 3,
                       cutpoints = mycuts)

#implicit don't match on explicit
cem_mix_imp <- cem(treatment = "mixed",
               data = as.data.frame(ukr_v_mix),
               drop = c("IAT","city_2_cat", "ethnicity", "explicit"),
               # keep.all=TRUE,
               verbose = 3,
               cutpoints = mycuts)

#explicit match on implicit
cem_rus_exp <- cem(treatment = "russian",
                   data = as.data.frame(ukr_v_rus),
                   drop = c("explicit","city_2_cat", "ethnicity"),
                   # keep.all=TRUE,
                   verbose = 3,
                   cutpoints = mycuts)

#explicit match on implicit
cem_mix_exp <- cem(treatment = "mixed",
                   data = as.data.frame(ukr_v_mix),
                   drop = c("explicit","city_2_cat", "ethnicity"),
                   # keep.all=TRUE,
                   verbose = 3,
                   cutpoints = mycuts)

get_att_est <- function(att_model) {
  out <- att_model$att.model[, 2]
  return(out)
}

#compare the estimated treatment effects for ethnicity
#for implicit do not match on explicit
#for explicit match on implicit
cem_rus_att_imp <- att(cem_rus_imp, formula = IAT~russian, data = as.data.frame(ukr_v_rus))
cem_mixed_att_imp <- att(cem_mix_imp, formula = IAT~mixed, data = as.data.frame(ukr_v_mix))
cem_rus_att_exp <- att(cem_rus_exp, formula = explicit~russian, data = as.data.frame(ukr_v_rus))
cem_mixed_att_exp <- att(cem_mix_exp, formula = explicit~mixed, data = as.data.frame(ukr_v_mix))


ethnic_mods <- list(cem_rus_att_imp, 
                    cem_mixed_att_imp, 
                    cem_rus_att_exp,
                    cem_mixed_att_exp)


att_df_eth <- do.call(bind_rows, (lapply(ethnic_mods, FUN =get_att_est)))
att_df_eth$comparison <- rep(c("Ukr v. Rus ","Ukr v. Mixed"), 2)
att_df_eth$type <- rep(c("Implicit","Explicit"), each  =2 )
att_df_eth <- mutate(att_df_eth, 
                 lower = Estimate - qnorm(.975) * `Std. Error`,
                 upper = Estimate + qnorm(.975) * `Std. Error`)

ggplot(data = att_df_eth, aes(x = comparison, y = Estimate, ymin = lower, ymax = upper, linetype = type, colour = type)) +
  geom_point(position = position_dodge(width = 0.2)) +
  geom_errorbar(position = position_dodge(width = 0.2), width = 0.1) +
  coord_flip() +
  xlab(NULL) + 
  ggtitle("Estimated SATT from CEM") +
  scale_y_continuous(name = "Scale" #, 
                     #sec.axis = sec_axis(~./5, "Implicit Scale")
                     ) +
  geom_hline(yintercept=0, linetype= "dashed", color = "grey80") +
  scale_colour_manual(values = c("blue", "red")) +
  theme_classic()

ggsave(filename = "./plots/att_ethnic.pdf",height = 2.5, width = 5)

#now do the same for abstention
#abstain v rusis explicit -- match on implict
cem_abstain_rus_exp <- cem(treatment = "abstain",
                       data = abstain_v_russia,
                       drop = c("parl2014_3", "explicit"),
                       # keep.all=TRUE,
                       verbose = 3,# ,
                       #cutpoints = mycuts
                       )

#implicit, don't match on explicit
cem_abstain_rus_imp <- cem(treatment = "abstain",
                           #eval.imbalance = TRUE,
                           baseline.group = 0,
                           data = abstain_v_russia,
                           drop = c("explicit","parl2014_3", "IAT"),
                           # keep.all=TRUE,
                           verbose = 3,
                           cutpoints = mycuts)

#implict, don't match on explicit
cem_abstain_ukr_imp <- cem(treatment = "abstain",
                   data = abstain_v_ukr,
                   drop = c("explicit","parl2014_3", "IAT"),
                   # keep.all=TRUE,
                   verbose = 3,
                   cutpoints = mycuts)

#explicity match on implicit
cem_abstain_ukr_exp <- cem(treatment = "abstain",
                       data = abstain_v_ukr,
                       drop = c("explicit", "parl2014_3"),
                       # keep.all=TRUE,
                       verbose = 3,
                       cutpoints = mycuts)


cem_abstain_ukr_imp <- att(cem_abstain_ukr_imp, formula = IAT~abstain,
                               data = as.data.frame(abstain_v_ukr))
cem_abstain_ukr_exp <- att(cem_abstain_ukr_exp, formula = explicit~abstain,
                           data = as.data.frame(abstain_v_ukr))
cem_abstain_rus_imp <- att(cem_abstain_rus_imp, formula = explicit~abstain,
                           data = as.data.frame(abstain_v_russia))
cem_abstain_rus_exp <- att(cem_abstain_rus_exp, formula = explicit~abstain,
                           data = as.data.frame(abstain_v_russia))

abstain_mods <- list(cem_abstain_ukr_imp, 
                     cem_abstain_ukr_exp, 
                     cem_abstain_rus_imp,
                     cem_abstain_rus_exp)


att_df <- do.call(bind_rows, (lapply(abstain_mods, FUN =get_att_est)))
att_df$comparison <- rep(c("Abstain v. Pro-Ukr","Abstain v. Pro-Russian"), each =2)
att_df$type <- rep(c("Implicit","Explicit"), 2)
att_df <- mutate(att_df, 
                 lower = Estimate - qnorm(.975) * `Std. Error`,
                 upper = Estimate + qnorm(.975) * `Std. Error`)
                        
ggplot(data = att_df, aes(x = comparison, y = Estimate, ymin = lower, ymax = upper, colour = type,
                          linetype = type)) +
  geom_point(position = position_dodge(width = 0.2)) +
  geom_errorbar(position = position_dodge(width = 0.2), width = 0.1) +
  coord_flip() +
  xlab(NULL) + 
  ggtitle("Estimated SATT from CEM") +
  scale_y_continuous(name = " Outcome Units",
                     #sec.axis = sec_axis(~./5, "Implicit Scale")
                     ) +
  geom_hline(yintercept=0, linetype= "dashed", color = "grey80") +
  scale_colour_manual(values = c("blue", "red")) +
  theme_classic()

ggsave(filename = "./plots/att_abstain.pdf", height = 2.5, width = 5)
# 
# cem_match_att <- att(obj = cem_abstain,
#                      formula = IAT ~ abstain + female + city_2_cat + unemployed + family_in_Russia,
#                      data = as.data.frame(abstain_v_russia),
#                      model="linear")
# 
# cem_match_at_exp <- att(obj = cem_abstain,
#                      formula = russia_exp_m ~ IAT + abstain + female + city_2_cat + unemployed + family_in_Russia,
#                      data = as.data.frame(abstain_v_russia),
#                      model="linear")
# 
# 
# 
# cem_match_att_ukrRE <- att(obj = cem_abstain_ukr,
#                      formula = IAT ~ abstain + female + city_2_cat + unemployed + family_in_Russia +age_response,
#                      data = as.data.frame(abstain_v_ukr),
#                      model="linear-RE")
# 
# cem_match_att_ukr <- att(obj = cem_abstain_ukr,
#                          formula = IAT ~ abstain  + female + city_2_cat + unemployed + family_in_Russia + age_response,
#                          data = as.data.frame(abstain_v_ukr),
#                          model="linear")
# 
# cem.match.att2 <- att(obj = cem_abstain_ukr,
#                      formula = IAT ~ abstain,
#                      data = as.data.frame(abstain_v_ukr),
#                      model="linear")
# 
# plot(cem.match.att, cem_abstain_ukr, as.data.frame(abstain_v_ukr))
# 
# cem_match_all <- att(obj = cem_abstain_all,
#                      formula = IAT ~ parl2014_3 ,
#                      data = as.data.frame(matching_data_c),
#                      model="linear")
# 
# 
# mydrops <- c("abstain","IAT")
# mymatch <-  c("russia_exp_m", "age_response")      
# myfrontier <- makeFrontier(dataset = as.data.frame(abstain_v_russia),
#                            treatment = 'abstain',
#                            match.on = mymatch,
#                            QOI = 'FSATT')
# 
# myests <- frontierEst(myfrontier,
#                       as.data.frame(abstain_v_russia),
#                       myform = formula(IAT ~ abstain),
#                       treatment = "abstain")
# 
# #we have 19 cases of missing data on treatment if treatment is voting behavior
# mat <- imbalance(matching_data$ethnicity, matching_data, drop = "IAT")
# mat <- imbalance(matching_data$parl2014_3, matching_data, drop = "IAT")
# 
# 
# user_data <- match.data(test_match)
# 
# cem_unemployed<- cem(treatment="unemployed", matching_data_c, drop="IAT",
#                keep.all=TRUE, baseline.group = 0,
#                cutpoints = list(age = c(18, 35, 80)),
#                verbose = 3)
# 
# cem_kyiv <- cem(treatment="east", matching_data_c, drop=c("IAT","city_2_cat"),
#                      keep.all=TRUE, baseline.group =  0,
#                      cutpoints = list(age_response = c(18, 35, 80)),
#                      verbose = 3)
# 
# cem_family <- cem(treatment="family_in_Russia", matching_data_c, drop=c("IAT","city_2_cat"),
#                 keep.all=TRUE, baseline.group =  0,
#                 cutpoints = list(age_response = c(18, 35, 80)),
#                 verbose = 3)
# 
# cem_kyiv_att <- att(cem_kyiv,IAT~east, as.data.frame(matching_data_c))
# cem_family_att <- att(cem_family,IAT~family_in_Russia, as.data.frame(matching_data_c))
# cem_unemployed_att <- att(cem_unemployed,IAT~unemployed, as.data.frame(matching_data_c))
# 
# tmp <- cemspace("Parl2014_d", na.omit(matching_data), drop="IAT", M=50)
# 
# 
# ###
# re74cut <- seq(0, 40000, 5000)
# re75cut <- seq(0, max(LL$re75)+1000, by=1000)
# agecut <- c(20.5, 25.5, 30.5,35.5,40.5)
# my.cutpoints <- list(re75=re75cut, re74=re74cut, age=agecut)
# 
# cem.match <- cem(treatment = "treated",
#                  data = LL, drop = "re78",
#                  cutpoints = my.cutpoints)
# 
# cem.match.att <- att(obj=cem.match, formula=re78 ~ treated,
#                      data = LL, model="linear")